leaguedf <- read_csv('../../data_sets/S13LeagueOfLegendsData.csv', 
                      col_types=c('c', 'c', 'c', 'c', 'c', 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'c'), 
                      col_names=c('rowno', 'Name', 'Class', 'Role', 'Tier', 'Score', 'Trend', "WinRate", "RoleRate", "PickRate", "BanRate", 'KDA', 'Patch'), skip=1) %>%
  column_to_rownames("rowno") %>% 
  mutate(PickBanRate = PickRate + BanRate, 
         Patch = as.numeric(str_replace(Patch, '(.*?)_(.*?)', '')), 
         Role = str_to_title(Role))
leaguedf$Tier = as.factor(leaguedf$Tier) %>%
  fct_relevel(c("God", "S", "A", "B", "C", "D"))


stats <- leaguedf %>% group_by(Name) %>%
  summarize(sdWinRate = sd(WinRate), sdPickBanRate = sd(PickBanRate))
leaguedf <- inner_join(stats, leaguedf, 'Name')
#Question 10: Volatility
Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

plot1a <- leaguedf %>%
  group_by(Name) %>% filter(Class != "NULL") %>%
  ggplot() + geom_point(mapping=aes(x=sdWinRate, y=sdPickBanRate, color = Class)) + facet_wrap(~ Role) + xlab("Standard Deviation of Win Rate") + ylab("Standard Deviation of Pick Ban Rate")

plot2a <- leaguedf %>%
  group_by(Class, Role, Patch) %>%
  summarize(sdPickBanRate = median(sdPickBanRate), sdWinRate = median(sdWinRate)) %>%
  ggplot() + 
  geom_line(mapping = aes(x = Patch, y = sdWinRate, color = Class)) + 
  labs(y = "Win Rate Standard Deviation", x = "Patch", title = "") + facet_wrap(~ Role) + 
  theme(legend.position = "none") 
plot2b <- leaguedf %>%
  group_by(Class, Role, Patch) %>%
  summarize(sdPickBanRate = median(sdPickBanRate), sdWinRate = median(sdWinRate)) %>%
  ggplot() + 
  geom_line(mapping = aes(x = Patch, y = sdPickBanRate, color = Class)) + 
  labs(y = "Pick Ban Rate Standard Deviation", x= 'Patch', title = "") + facet_wrap(~ Role) + 
  theme(legend.position = "none")

pw1 <- plot1a & plot_annotation(
  title = "Volatility Overview by Champion"
) &  
  theme(plot.title = element_text(hjust = 0.5, size = 15, face = 'bold')) 
pw2 <- (plot2a | plot2b) &
  plot_annotation(title = "Volatility Median by Class") & 
    theme(plot.title = element_text(hjust = 0.5, size = 15, face = 'bold'))

(wrap_elements(panel = pw1) / wrap_elements(panel = pw2)) & 
  plot_layout(guides = "collect")

#Question 9: Tier Prediction
Tier_Color <- c(God = "#abd63d", S ="lightblue",A =   "#bc0a7d",B =  "purple", C = "red", D = "yellow")
plot1a <- leaguedf %>% ggplot(mapping = aes(x = PickBanRate)) + 
  geom_histogram(aes(fill = Tier, y = after_stat(density)), 
                 alpha = 0.3,
                 bins = 50, color = "black") + 
  geom_density(mapping = aes(fill = Tier), alpha = 0.25) + 
  scale_y_continuous(name = "Density", sec.axis = sec_axis(~.*length(leaguedf), name = "Count")) + 
  facet_wrap(~ Tier) +
  theme(legend.position = "none") + 
  labs(title = "Faceted by Tier", x = "Pick Ban Rate", y = "Density") + 
  scale_fill_manual(values = Tier_Color)

plot2a <- leaguedf %>%
  ggplot(mapping = aes(x = PickBanRate)) + 
  geom_histogram(aes(fill = Tier, y = after_stat(density)), 
                 alpha = 0.3,
                 color = "black",
                 position = "identity",
                 bins = 50) + 
  geom_density(mapping = aes(fill = Tier), alpha = 0.25) + 
  labs(title = "Complete Plot", x = "Pick Ban Rate", y = "Density") + 
  scale_y_continuous(name = "Density", sec.axis = sec_axis(~.*length(leaguedf), name = "Count")) +
  scale_fill_manual(values = Tier_Color)

plot3a <- leaguedf%>% 
  ggplot(mapping = aes(x = PickBanRate)) + 
  geom_histogram(aes(fill = Tier), alpha = 0.3, color = "black") + 
  labs(title = "Stacked Histogram", x = "Pick Ban Rate", y = "Count") + 
  scale_fill_manual(values = Tier_Color) + 
  theme(legend.position = "none")


plot1a / plot2a / plot3a + plot_annotation(
  title = "Tier Distribution with Pick Ban Rate",
  subtitle = "Histogram overlayed with density plot",
  caption = "You can see that D and C tiers are very close to each other, with C only being slightly ahead. \nThe others are more spread out, and God has a section that is almost entirely it's own."
)

#Win rate and Pick Ban Rate over time seperated by Role
PatchRoleStats <- leaguedf %>% filter(Role != "Jungle") %>%
  group_by(Role, Patch) %>%
  summarize(meanWinRate = mean(WinRate), meanPBRate = mean(PickBanRate),.groups='keep')

plot1a <- leaguedf %>% filter(Role != "Jungle") %>%
  ggplot() + geom_line(mapping=aes(x=Patch, y=WinRate, color=Name, alpha=0.001)) +
  geom_line(data = PatchRoleStats, mapping = aes(x=Patch, y=meanWinRate), color="black")+ 
  theme(legend.position="none") + facet_wrap( ~ Role) + ggtitle("Win Rate over Time seperated by Role") + xlab('') + ylab("")

plot1b <- leaguedf %>% filter(Role != "Jungle") %>%
  ggplot() + 
  geom_line(mapping=aes(x=Patch, y=PickBanRate, color=Name, alpha=0.001)) + 
  geom_line(data = PatchRoleStats, mapping=aes(x=Patch, y=meanPBRate), color="black") + 
  theme(legend.position="above") + facet_wrap( ~ Role) + ggtitle("PBRate over Time Seperated by Role") + xlab('') + ylab("")
#Win Rate and Pick Ban Rate over Time for just Heimerdinger seperated by Role
plot2a <- leaguedf %>% filter(Name == "Heimerdinger") %>%
  ggplot() + geom_line(mapping = aes(x = Patch, y = WinRate, alpha=0.5), color="green") + 
  geom_line(data = PatchRoleStats, mapping=aes(x=Patch, y=meanWinRate), color="black") + 
  facet_wrap(~ Role) + theme(legend.position="none") + xlab("Patch") + ylab("")

plot2b <- leaguedf %>% filter(Name == "Heimerdinger") %>%
  ggplot() + geom_line(mapping = aes(x = Patch, y = PickBanRate), color="green") + 
  geom_line(data = PatchRoleStats, mapping=aes(x=Patch, y=meanPBRate), color="black") + 
  facet_wrap("Role") + theme(legend.position="none") + scale_x_discrete("Patch", labels = 1:24) + ylab('')


patched <- (plot1a & plot1b) / (plot2a & plot2b)
patched + plot_annotation(
  title = "Time Series plotting for Winrate and PickBanrate",
  subtitle = "With a special appearance by Heimerdinger!",
  caption = "Lines that dissapear and reappearindicate that the chapmion did not have a high enough play rate to be considered for that role."
)

Is this useful?????? What do you gain from it??

#Correlation of PBR for champions.
#TODO: Add Correlation for winrate, does it have any impact?
PbrCorrelation <- MakeCorrelationDf("PickBanRate", "Pbr") %>%
  group_by(Champion2) %>%
  arrange(PbrCorrelation) %>%
  mutate(label = case_when(
    row_number() <= 1 ~ str_to_title(str_replace(Champion1, '\\.', ' ')),
    row_number() > n() - 1 ~ str_to_title(str_replace(Champion1, '\\.', ' ')), # This adds a Space into the name where the . is  and uncapitalizes the second role
    Champion2 == "Tahm Kench.Support" & PbrCorrelation > 0.68 ~ "Senna Support", # This is an outlier so labeling is justified, especially since it helps show the part of the plot
    TRUE ~ as.character(NA)
  )) 

PbrCorrelation %>% filter(Champion2 %in% c("Tahm Kench Support", "Senna Support", "Ashe Adc")) %>% 
  ggplot(mapping = aes(x=Champion2, y = PbrCorrelation)) + 
  geom_boxplot() + 
  ggtitle("PBR Correlation Boxplot")+ 
  scale_x_discrete(labels = c("Ashe Adc", "Senna Support", "Tahm Kench Support")) + 
  labs(x = "", y = "Pick Ban Rate Correlation Coefficient", caption = "Minimum and Maximum corelation coefficients are annotated, as well as Senna Support for Tahm Kench Support in order\n to best visualize how the strength of certain counters, replacements, and synergies effect Pick Ban Rate.")  + 
  geom_text(aes(label = label), na.rm = TRUE, hjust = -0.1, size = 3) 

#Correlation Comparisons!
#Probably change to hex or something better for density than a scatter

wrCorr <- MakeCorrelationDf(varname = "WinRate")
pickCorr<- MakeCorrelationDf(varname = "PickRate")
banCorr <- MakeCorrelationDf(varname = "BanRate")
rule <- join_by(Champion1 == Champion1, Champion2 == Champion2)

TotalCorr <- inner_join(PbrCorrelation, wrCorr, by = rule) %>%
  select(-"label") %>%
  inner_join(pickCorr, rule) %>%
  inner_join(banCorr, rule)

plot1a <- TotalCorr %>%
  ggplot() + 
  geom_point(mapping = aes(x = WinRateCorrelation, y = PbrCorrelation), color = "#dba22e", size = 0.5) + 
  labs(title = "Overall Plot", x = "", y = "Pick Ban Rate Correlation")
plot1b <- TotalCorr %>%
  filter(grepl("Support", Champion1) & grepl("Adc", Champion2)) %>%
  ggplot() + 
  geom_point(mapping = aes(x = WinRateCorrelation, y = PbrCorrelation), color = "#dba22e", size = 0.5) + 
  labs(title = "Support and Adc only", y = "", x = "")

plot2a <- TotalCorr %>%
  ggplot() + 
  geom_point(mapping = aes(x = WinRateCorrelation, y = PickRateCorrelation), color = "#bc0a7d", size = 0.5) + 
  labs(title = "", x = "", y = "")

plot2b <- TotalCorr %>%
  filter(grepl("Support", Champion1) & grepl("Adc", Champion2)) %>%
  ggplot() + 
  geom_point(mapping = aes(x = WinRateCorrelation, y = PickRateCorrelation), color = "#bc0a7d", size = 0.5) + 
  labs(title = "", x = "", y = "")

plot3a <- TotalCorr %>%
  ggplot() + 
  geom_point(mapping = aes(x = WinRateCorrelation, y = BanRateCorrelation), color = "#abd63d", size = 0.5) + 
  labs(title = "", x = "", y = "")

plot3b <- TotalCorr %>%
  filter(grepl("Support", Champion1) & grepl("Adc", Champion2)) %>%
  ggplot() + 
  geom_point(mapping = aes(x = WinRateCorrelation, y = BanRateCorrelation), color = "#abd63d", size = 0.5) + 
  labs(title = "", x = "", y = "")

plot4a <- TotalCorr %>%
  ggplot() + 
  geom_point(mapping = aes(x = WinRateCorrelation, y = PbrCorrelation), color = "#dba22e", size = 0.5, alpha = 0.2) + 
  geom_point(mapping = aes(x = WinRateCorrelation, y = PickRateCorrelation), color = "#bc0a7d", size = 0.5, alpha = 0.2) + 
  geom_point(mapping = aes(x = WinRateCorrelation, y = BanRateCorrelation), color = "#abd63d", size = 0.5, alpha = 0.2) + 
  labs(x = "Win Rate Correlation", y = "")

plot4b <- TotalCorr %>%
  filter(grepl("Support", Champion1) & grepl("Adc", Champion2)) %>%
  ggplot() + 
  geom_point(mapping = aes(x = WinRateCorrelation, y = PbrCorrelation, color = 'a'), size = 0.5, apha = 0.2) + 
  geom_point(mapping = aes(x = WinRateCorrelation, y = PickRateCorrelation, color = 'b'), size = 0.5, alpha = 0.2) + 
  geom_point(mapping = aes(x = WinRateCorrelation, y = BanRateCorrelation, color = 'c'), size = 0.5, alpha = 0.2) + 
  labs(x = "Win Rate Correlation",y = "") + 
  scale_color_manual(values = c("a" = "#dba22e", "b" = "#bc0a7d", "c" = "#abd63d"), name = "Correlation", labels = c("Pick Ban Rate", "Ban Rate", "Pick Rate"), guide = "legend") + 
  theme(legend.position = "right")

design = "
129
349
569
789"

pw <- plot1a + plot1b + plot2a + plot2b + plot3a + plot3b + plot4a + plot4b + plot_layout(design = design, guides = "collect") + plot_annotation(
  title = "Correlation Comparisons",
  subtitle = "For Pick Ban Rate, Pick Rate, and Ban rate compared to Win Rate"
)

pw

leaguedf %>%
  select("Name", "PickBanRate", "WinRate", "Role", "RoleRate", "Class", "Patch") %>%
  filter(!(Class == "NULL")) %>%
  group_by(Role) %>%
  group_map( ~ plot_ly(data = .,
      x = ~ PickBanRate,
      y = ~ WinRate,
      color = ~ Class,
      text = ~ Name,
      frame = ~ Patch, 
      hoverinfo = "text",
      type = "scatter",
      mode = "markers", 
      marker = list(size = ~ RoleRate*5)
      ), .keep = TRUE) %>%
  subplot(nrows = 2, shareX = TRUE, shareY=TRUE, margin=0.03) %>%
  layout(showlegend = FALSE, title = 'Pick Ban Rate vs. Win Rate by Patch seperated by Role',
         plot_bgcolor='#e5ecf6', 
         xaxis = list( 
           zerolinecolor = '#ffff', 
           zerolinewidth = 2, 
           gridcolor = 'ffff'), 
         yaxis = list( 
           zerolinecolor = '#ffff', 
           zerolinewidth = 2, 
           gridcolor = 'ffff'),
         margin = 0.07) %>%
  layout(annotations = annotations)
corr_data <- leaguedf %>%
  select(c("WinRate", "Role", "KDA")) %>%
  pivot_wider(names_from = Role,
              values_from = Role,
              values_fn = function(x) TRUE,
              values_fill = FALSE)

model <- nls(WinRate ~ a*KDA + b*Top*KDA^2 + c*Mid*KDA^2 + d*Jungle*KDA^2 + e*Support*KDA^2 + f*Adc*KDA^2 + g, data = corr_data, start = list(a = 0.01, b = 0.01, c = 0.01, d = 0.01, e = 0.01, f = 0.01, g = 0.4))

predict_wr <- function(kda, Top, Mid, Jungle, Support, Adc) {
  predict(model, newdata = data.frame(KDA = kda, Top = Top, Mid = Mid, Jungle = Jungle, Support = Support, Adc = Adc))
}

train_control <- trainControl(method = "repeatedcv", number = 25, repeats = 5)
nls1 <- train(WinRate ~ predict_wr(KDA, Top, Mid, Jungle, Support, Adc), data = corr_data,
                                         method = "lm", 
                                         trControl = train_control,
                                         preProcess = c("center", "scale"))

summary(nls1) 
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.096415 -0.009614  0.000986  0.011024  0.059827 
## 
## Coefficients:
##                                                   Estimate Std. Error t value
## (Intercept)                                       0.503830   0.000225 2239.54
## `predict_wr(KDA, Top, Mid, Jungle, Support, Adc)` 0.006455   0.000225   28.69
##                                                   Pr(>|t|)    
## (Intercept)                                         <2e-16 ***
## `predict_wr(KDA, Top, Mid, Jungle, Support, Adc)`   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01659 on 5435 degrees of freedom
## Multiple R-squared:  0.1315, Adjusted R-squared:  0.1314 
## F-statistic: 823.2 on 1 and 5435 DF,  p-value: < 2.2e-16
summary(model)
## 
## Formula: WinRate ~ a * KDA + b * Top * KDA^2 + c * Mid * KDA^2 + d * Jungle * 
##     KDA^2 + e * Support * KDA^2 + f * Adc * KDA^2 + g
## 
## Parameters:
##     Estimate Std. Error t value Pr(>|t|)    
## a  0.0165073  0.0014125  11.687  < 2e-16 ***
## b  0.0011164  0.0002740   4.075 4.66e-05 ***
## c  0.0003812  0.0002622   1.454  0.14608    
## d -0.0009877  0.0002625  -3.762  0.00017 ***
## e -0.0012356  0.0002554  -4.838 1.35e-06 ***
## f  0.0009030  0.0002786   3.241  0.00120 ** 
## g  0.4662019  0.0021234 219.556  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0166 on 5430 degrees of freedom
## 
## Number of iterations to convergence: 1 
## Achieved convergence tolerance: 3.226e-08
MidGrid <- expand.grid(KDA = seq(0,5, length.out = 501), Mid = c(TRUE), Top = FALSE, Adc = FALSE, Support = FALSE, Jungle = FALSE)
MidGrid$WinRate <- predict(nls1, MidGrid)
JungleGrid <- expand.grid(KDA = seq(0,5, length.out = 501), Mid = FALSE, Top = FALSE, Adc = FALSE, Support = FALSE, Jungle = TRUE)
JungleGrid$WinRate <- predict(nls1, JungleGrid)
TopGrid <- expand.grid(KDA = seq(0,5, length.out = 501), Mid = FALSE, Top = T, Adc = FALSE, Support = FALSE, Jungle = FALSE)
TopGrid$WinRate <- predict(nls1, TopGrid)
AdcGrid <- expand.grid(KDA = seq(0,5, length.out = 501), Mid = F, Top = FALSE, Adc = T, Support = FALSE, Jungle = FALSE)
AdcGrid$WinRate <- predict(nls1, AdcGrid)
SupportGrid <- expand.grid(KDA = seq(0,5, length.out = 501), Mid = F, Top = FALSE, Adc = FALSE, Support = T, Jungle = FALSE)
SupportGrid$WinRate <- predict(nls1, SupportGrid)

plot1a <- corr_data %>% filter(Mid == TRUE) %>%
  ggplot(mapping = aes(x = KDA, y = WinRate)) + 
  geom_point(alpha = 0.1) + 
  geom_line(data = MidGrid, mapping = aes(x = KDA, y = WinRate), color = "blue") + 
  ggtitle("Mid") 

plot1b <- corr_data %>% filter(Adc == TRUE) %>%
  ggplot(mapping = aes(x = KDA, y = WinRate)) + 
  geom_point(alpha = 0.1) + 
  geom_line(data = AdcGrid, mapping = aes(x = KDA, y = WinRate), color = "red")+ 
  ggtitle("Adc")

plot1c <- corr_data %>% filter(Support == TRUE) %>%
  ggplot(mapping = aes(x = KDA, y = WinRate)) + 
  geom_point(alpha = 0.1) + 
  geom_line(data = SupportGrid, mapping = aes(x = KDA, y = WinRate), color = "green") + 
  ggtitle("Support")

plot2a <- corr_data %>% filter(Jungle== TRUE) %>%
  ggplot(mapping = aes(x = KDA, y = WinRate)) + 
  geom_point(alpha = 0.1) + 
  geom_line(data = JungleGrid, mapping = aes(x = KDA, y = WinRate), color = "purple") + 
  ggtitle("Jungle")

plot2b <- corr_data %>% filter(Top == TRUE) %>%
  ggplot(mapping = aes(x = KDA, y = WinRate)) + 
  geom_point(alpha = 0.1) + 
  geom_line(data = TopGrid, mapping = aes(x = KDA, y = WinRate), color = "orange") + 
  ggtitle("Top")

pw <- (plot1a & plot1b) / (plot1c) / (plot2a & plot2b) 

pw + plot_annotation(
  title = "Quadratic Regression of KDA and Winrate",
  subtitle = "Seperated by Role",
  caption = "Support has a strong negative coefficient for the 2nd degree, emphasizing that Support sees lower payoffs than other roles for high KDA."
)

RoleByClass <- leaguedf %>%
  mutate(PlayRate = RoleRate * PickRate / 2) %>% #We divide by 2 because there are two of each role in every game.
  group_by(Patch, Role, Class) %>%
  summarize(Visibility = sum(PlayRate), WinRate = mean(WinRate)) %>%
  ungroup()

head(RoleByClass, 5) #Note that the sum of playrate will not = 1, as there are champions that are played in each role that aren't represented in the dataset, because of low play rates.
## # A tibble: 5 × 5
##   Patch Role   Class    Visibility WinRate
##   <dbl> <chr>  <chr>         <dbl>   <dbl>
## 1     1 Adc    Fighter     0.0370    0.499
## 2     1 Adc    Mage        0.00402   0.519
## 3     1 Adc    Marksman    0.825     0.504
## 4     1 Jungle Assassin    0.210     0.486
## 5     1 Jungle Fighter     0.291     0.499
plot1a <- RoleByClass %>% 
  ggplot() + 
  geom_boxplot(mapping = aes(x = Role, color = Class, y = Visibility)) + 
  labs(title = "Class Visibility", y = "Visibility", x = "") + 
  theme(legend.position  = "none")
plot1b <- RoleByClass %>%
  ggplot() + 
  geom_boxplot(mapping = aes(x = Role, color = Class, y = WinRate)) + 
  labs(title = "Class Winrate", y = "Mean Win Rate by Patch", x = "") + 
  theme(legend.position = "none")
plot2a <- RoleByClass %>%
  group_by(Class, Role) %>%
  summarize(meanwr = mean(WinRate), sdwr = sd(WinRate), meanvis = mean(Visibility)) %>%
  ggplot() + 
  geom_point(mapping = aes(x = meanvis, y = meanwr, size = sdwr, color = Class)) + 
  labs(title = "Win Rate vs Visibility", 
       x = "Visibility", y = "Win Rate", size = "Standard Deviation\nof Win Rate") + 
  facet_wrap(~ Role) + 
  theme(legend.position = "none")

plot2b <- RoleByClass %>%
  group_by(Class) %>%
  summarize(total = sum(Visibility), median = median(WinRate))  %>%
  mutate(total = total/sum(total)) %>%
  ggplot(mapping = aes(x = 4, 
                       y = total)) + 
  geom_col(mapping = aes(fill = Class),
           color = "black",
           position = "fill") +
  geom_label(
    aes(label = paste(round(total, 2)*100, '%', sep = ''), fill = Class),
    position = position_fill(vjust = 0.5),  alpha = 0, label.size = NA,
    show.legend = "false") +
  coord_polar(theta = "y") +
  lims(x = c(0.2, 4.5)) + 
  theme_void() + 
  labs(title = "Visibility by Class Overall")
  

#TODO Fix legend to make more legible and edit caption

(plot1a + plot1b) / (plot2a & plot2b) + plot_annotation(title = "Boxplots analyzing Role and Class relationships with pickrate",
                                  caption = "You can see that Marksman is consisently heavily played in ADC (> 80%) yet the mean win rate falls lower than Mage at every patch. \n However, Marksman has an incredibly small standard deviation for the winrate, meaning it's very consistent over the long term, while Mage is has the largest in the group.")  + 
  plot_layout(guides = "collect")